{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import time\n",
    "import numpy as np\n",
    "import wurlitzer\n",
    "\n",
    "import trtlab\n",
    "\n",
    "# this allows us to capture stdout and stderr from the backend c++ infer-runtime\n",
    "display_output = wurlitzer.sys_pipes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "!/work/models/setup.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Local Inference Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: Logging before InitGoogleLogging() is written to STDERR\n",
      "I0204 22:01:27.543411   925 inference_manager.cc:64] -- Initialzing TensorRT Resource Manager --\n",
      "I0204 22:01:27.543426   925 inference_manager.cc:65] Maximum Execution Concurrency: 4\n",
      "I0204 22:01:27.543429   925 inference_manager.cc:66] Maximum Copy Concurrency: 8\n"
     ]
    }
   ],
   "source": [
    "with display_output():\n",
    "    manager = trtlab.InferenceManager(max_exec_concurrency=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0204 22:01:30.164453   925 model.cc:91] Binding: data; isInput: true; dtype size: 4; bytes per batch item: 602112\n",
      "I0204 22:01:30.164479   925 model.cc:91] Binding: prob; isInput: false; dtype size: 4; bytes per batch item: 4000\n",
      "I0204 22:01:30.169529   925 inference_manager.cc:149] -- Registering Model: rn50-b1 --\n",
      "I0204 22:01:30.169546   925 inference_manager.cc:150] Input/Output Tensors require 591.9 KiB\n",
      "I0204 22:01:30.169550   925 inference_manager.cc:151] Execution Activations require 5.7 MiB\n",
      "I0204 22:01:30.169554   925 inference_manager.cc:155] Weights require 75.8 MiB\n",
      "I0204 22:01:30.223752   925 model.cc:91] Binding: data; isInput: true; dtype size: 4; bytes per batch item: 602112\n",
      "I0204 22:01:30.223776   925 model.cc:91] Binding: prob; isInput: false; dtype size: 4; bytes per batch item: 4000\n",
      "I0204 22:01:30.227011   925 inference_manager.cc:149] -- Registering Model: rn50-b8 --\n",
      "I0204 22:01:30.227035   925 inference_manager.cc:150] Input/Output Tensors require 4.6 MiB\n",
      "I0204 22:01:30.227041   925 inference_manager.cc:151] Execution Activations require 39.8 MiB\n",
      "I0204 22:01:30.227046   925 inference_manager.cc:155] Weights require 49.0 MiB\n"
     ]
    }
   ],
   "source": [
    "with display_output():\n",
    "    manager.register_tensorrt_engine(\"rn50-b1\", \"/work/models/ResNet-50-b1-fp16.engine\")\n",
    "    manager.register_tensorrt_engine(\"rn50-b8\", \"/work/models/ResNet-50-b8-fp16.engine\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0204 22:01:31.025523   925 inference_manager.cc:194] -- Allocating TensorRT Resources --\n",
      "I0204 22:01:31.025539   925 inference_manager.cc:195] Creating 4 TensorRT execution tokens.\n",
      "I0204 22:01:31.025542   925 inference_manager.cc:196] Creating a Pool of 8 Host/Device Memory Stacks\n",
      "I0204 22:01:31.025550   925 inference_manager.cc:197] Each Host Stack contains 4.7 MiB\n",
      "I0204 22:01:31.025554   925 inference_manager.cc:198] Each Device Stack contains 4.8 MiB\n",
      "I0204 22:01:31.025559   925 inference_manager.cc:199] Total GPU Memory: 197.5 MiB\n"
     ]
    }
   ],
   "source": [
    "with display_output():\n",
    "    manager.update_resources()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Local Inference Properties"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "b1 = manager.infer_runner(\"rn50-b1\")\n",
    "b8 = manager.infer_runner(\"rn50-b8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'data': {'dtype': dtype('float32'), 'shape': [3, 224, 224]}}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b1.input_bindings()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b1.max_batch_size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'data': {'dtype': dtype('float32'), 'shape': [3, 224, 224]}}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b8.input_bindings()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b8.max_batch_size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def max_batch_size_shape(x, input='data'):\n",
    "    batch = [x.max_batch_size()]\n",
    "    batch.extend(x.input_bindings()[input]['shape'])\n",
    "    return batch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[8, 3, 224, 224]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_batch_size_shape(b8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compute\n",
    "\n",
    "Here we launch two async inferences with two different TensorRT engines, one built for batch1, the other for batch8.  While these are the same ResNet-50 models, they could be any two unique TensorRT engines.\n",
    "\n",
    "Note: for this example the weights of the model and the input tensors are all random values."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "futures = [model.infer(data=np.random.random_sample(max_batch_size_shape(model))) for model in [b1, b8]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# free to do other work while inference is being computed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = [f.get() for f in futures]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "prob binding has shape: (1, 1000, 1, 1)\n",
      "prob binding has shape: (8, 1000, 1, 1)\n"
     ]
    }
   ],
   "source": [
    "for result in results:\n",
    "    for output, tensor in result.items():\n",
    "        print(\"{} binding has shape: {}\".format(output, tensor.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
